home *** CD-ROM | disk | FTP | other *** search
- XDEF _c2p4
- XDEF _Initc2p4
- XDEF _Exitc2p4
-
- ; ---------------------------------------------------------------------
- ; void c2p4 (UBYTE *fBUFFER,
- ; UBYTE *fBUFFER_CMP,
- ; PLANEPTR *planes,
- ; struct Task *task,
- ; ULONG signals);
- ;
- ; 4-plane unpacked chunky to planar converter.
- ; Optimised for 68020/30 with fastmem.
- ;
- ; Author: Peter McGavin (e-mail peterm@maths.grace.cri.nz), 6 April 1994
- ; Based on James McCoull's 4-pass blitter algorithm.
- ;
- ; This code is public domain.
- ;
- ; Use chunky comparison buffer. Return immediately if no diffs found.
- ; Perform first 2 passes (Fast->Chip) with the CPU (in 1 pass).
- ; Update chunky comparison buffer.
- ; Perform passes 3 & 4 with QBlit().
- ; Return immediately after launching blits.
- ; Signal task from CleanUp() on completion.
- ; Task should wait for signal before next call to c2p4().
- ;
- ; (Unimplemented speedup idea: Might be possible to signal task after pass 3,
- ; but will probably need another Wait() somewhere.)
- ;
- ; Approx timing (A4000/030, 320x200x4):
- ; CPU pass max 18ms (then return)
- ; Asynchronous blitter passes add 31ms
- ;
- ; Example usage:
- ;
- ; /* clear fBUFFER, fBUFFER_CMP, and planes here */
- ; if ((sigbit = AllocSignal(-1)) == -1)
- ; die ("Can't allocate signal!\n");
- ; safe = TRUE;
- ; for (;;) {
- ; ... /* render to fBUFFER here */
- ; if (!safe) {
- ; Wait (1<<sigbit); // wait for previous c2p4 to finish
- ; safe = TRUE;
- ; }
- ; c2p4 (fBUFFER, fBUFFER_CMP, &RASTPORT->BitMap->Planes[0],
- ; FindTask(NULL), 1<<sigbit);
- ; safe = FALSE;
- ; }
- ; if (!safe)
- ; Wait (1<<sigbit); // wait for last c2p4 to finish
- ; FreeSignal(sigbit);
- ;
- ; <20.Jan.95: Angepaßt für den Frodo C64-Emulator und an OCS
- ; von Christian Bauer>
-
-
- width equ $180 ; must be a multiple of 32
- height equ $110
- toplinestoskip equ 0
-
- plsiz equ (width/8)*height
- pixels equ width*height
- offset equ (width/8)*toplinestoskip
-
- cleanup equ $40
-
- INCLUDE "exec/types.i"
- INCLUDE "exec/macros.i"
- INCLUDE "exec/memory.i"
- INCLUDE "graphics/gfxbase.i"
- INCLUDE "hardware/custom.i"
-
- XREF _SysBase
- XREF _GfxBase
-
- SECTION "text",CODE
-
- ; Installierte Chips ermitteln (OCS/ECS) und buff2 belegen
- ; RÜckgabe: d0#0: OK
- _Initc2p4 move.l _GfxBase,a0
- btst #GFXB_BIG_BLITS,gb_ChipRevBits0(a0)
- bne 1$
-
- move.l #blit43,qblitfunc
- move.l #blit43,initblitfunc
- move.w #-1,wehaveocs
-
- move.l a6,-(sp)
- move.l _SysBase,a6
- move.l #pixels/2,d0
- move.l #MEMF_ANY,d1
- JSRLIB AllocVec
- move.l d0,buff2ptr
- move.l (sp)+,a6
- rts
-
- 1$ move.l #blit31,qblitfunc
- move.l #blit31,initblitfunc
- clr.w wehaveocs
-
- move.l a6,-(sp)
- move.l _SysBase,a6
- move.l #pixels/2,d0
- move.l #MEMF_CHIP,d1
- JSRLIB AllocVec
- move.l d0,buff2ptr
- move.l (sp)+,a6
- rts
-
- _Exitc2p4 move.l a6,-(sp)
- move.l _SysBase,a6
- move.l buff2ptr,d0
- beq 1$
- move.l d0,a1
- JSRLIB FreeVec
- 1$ move.l (sp)+,a6
- rts
-
- _c2p4 movem.l d2-d7/a2-a6,-(sp)
-
- movem.l 4+44(sp),a2-a5
- move.l 20+44(sp),d0
-
- ; save arguments
-
- move.l #mybltnode,a0
- move.l a2,(chunky-mybltnode,a0)
- move.l a4,(planes-mybltnode,a0)
- move.l a5,(task-mybltnode,a0)
- move.l d0,(signals-mybltnode,a0)
-
- ;-------------------------------------------------
- ;original chunky data
- ;0 ........a3a2a1a0 ........b3b2b1b0
- ;2 ........c3c2c1c0 ........d3d2d1d0
- ;4 ........e3e2e1e0 ........f3f2f1f0
- ;6 ........g3g2g1g0 ........h3h2h1h0
- ;8 ........i3i2i1i0 ........j3j2j1j0
- ;10 ........k3k2k1k0 ........l3l2l1l0
- ;12 ........m3m2m1m0 ........n3n2n1n0
- ;14 ........o3o2o1o0 ........p3p2p1p0
- ;16 ........q3q2q1q0 ........r3r2r1r0
- ;18 ........s3s2s1s0 ........t3t2t1t0
- ;20 ........u3u2u1u0 ........v3v2v1v0
- ;22 ........w3w2w1w0 ........x3x2x1x0
- ;24 ........y3y2y1y0 ........z3z2z1z0
- ;26 ........A3A2A1A0 ........B3B2B1B0
- ;28 ........C3C2C1C0 ........D3D2D1D0
- ;30 ........E3E2E1E0 ........F3F2F1F0
- ;-------------------------------------------------
-
- move.l buff2ptr,a4 ; a4 -> buff2
- move.l #$00ff00ff,d7 ; constant
- move.w #pixels/32,d6 ; loop counter
-
- bra.b end_pass1loop
-
- CNOP 0,4
-
- ; main loop (starts here) processes 32 chunky pixels at a time
- ; compare next 32 pixels with compare page, looking for differences
-
- initpass1loop: cmpm.l (a2)+,(a3)+
- bne.w fix1
- cmpm.l (a2)+,(a3)+
- bne.w fix2
- cmpm.l (a2)+,(a3)+
- bne.b fix3
- cmpm.l (a2)+,(a3)+
- bne.b fix4
- cmpm.l (a2)+,(a3)+
- bne.b fix5
- cmpm.l (a2)+,(a3)+
- bne.b fix6
- cmpm.l (a2)+,(a3)+
- bne.b fix7
- cmpm.l (a2)+,(a3)+
- bne.b fix8
-
- addq.l #8,a4 ; skip 8 bytes in output
-
- end_pass1loop: dbra d6,initpass1loop
-
- ; If we get to here then no difference was found.
- ; Signal the task and return.
-
- move.l (task-mybltnode,a0),a1
- move.l (signals-mybltnode,a0),d0
- move.l (4).w,a6
- JSRLIB Signal
-
- movem.l (sp)+,d2-d7/a2-a6
- rts
-
- ; This becomes the main loop after the first difference is found
-
- pass1loop: cmpm.l (a2)+,(a3)+
- bne.b fix1
- cmpm.l (a2)+,(a3)+
- bne.b fix2
- cmpm.l (a2)+,(a3)+
- bne.b fix3
- cmpm.l (a2)+,(a3)+
- bne.b fix4
- cmpm.l (a2)+,(a3)+
- bne.b fix5
- cmpm.l (a2)+,(a3)+
- bne.b fix6
- cmpm.l (a2)+,(a3)+
- bne.b fix7
- cmpm.l (a2)+,(a3)+
- bne.b fix8
-
- addq.l #8,a4 ; skip 8 bytes in output
-
- dbra d6,pass1loop
-
- bra.w done
-
- ; difference found, restore a2 and a3
-
- fix8: subq.l #4,a2
- subq.l #4,a3
- fix7: sub.w #28,a2
- sub.w #28,a3
- bra.b go_c2p
-
- fix6: subq.l #4,a2
- subq.l #4,a3
- fix5: sub.w #20,a2
- sub.w #20,a3
- bra.b go_c2p
-
- fix4: subq.l #4,a2
- subq.l #4,a3
- fix3: sub.w #12,a2
- sub.w #12,a3
- bra.b go_c2p
-
- fix2: subq.l #4,a2
- subq.l #4,a3
- fix1: subq.l #4,a2
- subq.l #4,a3
-
- ; convert 32 pixels (passes 1 and 2 combined)
-
- go_c2p: movem.l (a2)+,d0-d3/a0/a1/a5/a6 ; ABCD EFGH IJKL MNOP QRST UVWX YZ01 2345
-
- move.l #$0f0f0f0f,d4 ;<Obere Nibbles löschen>
- and.l d4,d0
- and.l d4,d1
- and.l d4,d2
- and.l d4,d3
-
- movem.l d0-d3/a0/a1/a5/a6,(a3) ; update compare buffer
- adda.w #32,a3
-
- lsl.l #4,d0 ; A.B.C.D.
- move.l d0,d4 ; A.B.C.D.
- and.l d7,d4 ; ..B...D.
- eor.l d4,d0 ; A...C...
-
- move.l d1,d5 ; .E.F.G.H
- and.l d7,d5 ; ...F...H
- eor.l d5,d1 ; .E...G..
-
- or.l d1,d0 ; AE..CG..
- or.l d5,d4 ; ..BF..DH
-
- move.l d2,d1 ; .I.J.K.L
- and.l d7,d1 ; ...J...L
-
- move.l d3,d5 ; .M.N.O.P
- and.l d7,d5 ; ...N...P
-
- lsl.l #4,d4 ; .BF..DH.
- or.l d1,d4 ; .BFJ.DHL
- lsl.l #4,d4 ; BFJ.DHL.
- or.l d5,d4 ; BFJNDHLP
-
- move.l d4,(pixels/4,a4)
-
- eor.l d5,d3 ; .M...O..
- lsr.l #4,d3 ; ..M...O.
- eor.l d1,d2 ; .I...K..
- or.l d3,d2 ; .IM..KO.
- lsr.l #4,d2 ; ..IM..KO
- or.l d2,d0 ; AEIMCGKO
-
- move.l a6,d3
- move.l a5,d2
- move.l a1,d1
-
- move.l d0,(a4)+
-
- move.l a0,d0
-
- move.l #$0f0f0f0f,d4 ;<Obere Nibbles löschen>
- and.l d4,d0
- and.l d4,d1
- and.l d4,d2
- and.l d4,d3
-
- lsl.l #4,d0 ; Q.R.S.T.
- move.l d0,d4 ; Q.R.S.T.
- and.l d7,d4 ; ..R...T.
- eor.l d4,d0 ; Q...S...
-
- move.l d1,d5 ; .U.V.W.X
- and.l d7,d5 ; ...V...X
- eor.l d5,d1 ; .U...W..
-
- or.l d1,d0 ; QU..SW..
- or.l d5,d4 ; ..RV..TX
-
- move.l d2,d1 ; .Y.Z.0.1
- and.l d7,d1 ; ...Z...1
-
- move.l d3,d5 ; .2.3.4.5
- and.l d7,d5 ; ...3...5
-
- lsl.l #4,d4 ; .RV..TX.
- or.l d1,d4 ; .RVZ.TX1
- lsl.l #4,d4 ; RVZ.TX1.
- or.l d5,d4 ; RVZ3TX15
-
- move.l d4,(pixels/4,a4)
-
- eor.l d5,d3 ; .2...4..
- lsr.l #4,d3 ; ..2...4.
- eor.l d1,d2 ; .Y...0..
- or.l d3,d2 ; .Y2..04.
- lsr.l #4,d2 ; ..Y2..04
- or.l d2,d0 ; QUY2SW04
-
- move.l d0,(a4)+
-
- dbra d6,pass1loop
-
- ; start the blitter in the background for passes 3 & 4
- ; <OCS: pass 4 only, pass 3 is done by CPU>
-
- done: tst.w wehaveocs
- beq 3$
-
- move.w #pixels/8-1,d7 ;blit31
- move.l buff2ptr,a0
- move.l buff2ptr,a1
- addq.l #2,a1
- lea buff3,a2
- move.w #$cccc,d2
- 1$ move.w (a0)+,d0
- addq.l #2,a0
- and.w d2,d0
- move.w (a1)+,d1
- addq.l #2,a1
- and.w d2,d1
- lsr.w #2,d1
- or.w d1,d0
- move.w d0,(a2)+
- dbra d7,1$
-
- move.w #pixels/8-1,d7 ;blit32
- move.l buff2ptr,a0
- add.l #pixels/2-2,a0
- move.l buff2ptr,a1
- add.l #pixels/2,a1
- lea buff3+pixels/2,a2
- move.w #$3333,d2
- 2$ move.w -(a0),d0
- subq.l #2,a0
- and.w d2,d0
- lsl.w #2,d0
- move.w -(a1),d1
- subq.l #2,a1
- and.w d2,d1
- or.w d1,d0
- move.w d0,-(a2)
- dbra d7,2$
-
- 3$ lea mybltnode,a1
- move.l _GfxBase,a6
- JSRLIB QBlit
-
- movem.l (sp)+,d2-d7/a2-a6
- rts
-
- ;-----------------------------------------------------------------------------
- ; QBlit functions (called asynchronously)
-
- ;-------------------------------------------------
- ;after pass 2
- ;0 a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0
- ;2 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
- ;4 q3q2q1q0u3u2u1u0 y3y2y1y0C3C2C1C0
- ;6 s3s2s1s0w3w2w1w0 A3A2A1A0E3E2E1E0
- ;
- ;pixels/4+0 b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0
- ;pixels/4+2 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
- ;pixels/4+4 r3r2r1r0v3v2v1v0 z3z2z1z0D3D2D1D0
- ;pixels/4+6 t3t2t1t0x3x2x1x0 B3B2B1B0F3F2F1F0
- ;-------------------------------------------------
-
- ;Pass 3, subpass 1
- ; apt buff2
- ; bpt buff2+2
- ; dpt buff3
- ; amod 2
- ; bmod 2
- ; dmod 0
- ; cdat $cccc
- ; sizv pixels/8
- ; sizh 1 word
- ; con D=AC+(B>>2)~C, ascending
-
- blit31: moveq #-1,d0
- move.l d0,(bltafwm,a0)
- move.w #0,(bltdmod,a0)
- move.l (buff2ptr-mybltnode,a1),d0
- move.l d0,(bltapt,a0)
- addq.l #2,d0
- move.l d0,(bltbpt,a0)
- move.l #buff3,(bltdpt,a0)
- move.w #2,(bltamod,a0)
- move.w #2,(bltbmod,a0)
- move.w #pixels/8,(bltsizv,a0)
- move.w #$cccc,(bltcdat,a0)
- move.l #$0DE42000,(bltcon0,a0) ; D=AC+(B>>2)~C
- move.w #1,(bltsizh,a0) ;do blit
- lea (blit32,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- ;Pass 3, subpass 2
- ; apt buff2+pixels/2-2-2
- ; bpt buff2+pixels/2-2
- ; dpt buff3+pixels/2-2
- ; amod 2
- ; bmod 2
- ; dmod 0
- ; cdat $cccc
- ; sizv pixels/8
- ; sizh 1 word
- ; con D=(A<<2)C+B~C, descending
-
- blit32: move.l (buff2ptr-mybltnode,a1),d0
- add.l #pixels/2-2,d0
- move.l d0,(bltbpt,a0)
- subq.l #2,d0
- move.l d0,(bltapt,a0)
- move.l #buff3+pixels/2-2,(bltdpt,a0)
- move.l #$2DE40002,(bltcon0,a0) ; D=(A<<2)C+B~C, desc.
- move.w #1,(bltsizh,a0) ;do blit
- lea (blit43,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- ;-------------------------------------------------
- ;after pass 3
- ;0 a3a2c3c2e3e2g3g2 i3i2k3k2m3m2o3o2
- ;2 q3q2s3s2u3u2w3w2 y3y2A3A2C3C2E3E2
- ;
- ;pixels/8+0 b3b2d3d2f3f2h3h2 j3j2l3l2n3n2p3p2
- ;pixels/8+2 r3r2t3t2v3v2x3x2 z3z2B3B2D3D2F3F2
- ;
- ;pixels/4+0 a1a0c1c0e1e0g1g0 i1i0k1k0m1m0o1o0
- ;pixels/4+2 q1q0s1s0u1u0w1w0 y1y0A1A0C1C0E1E0
- ;
- ;3*pixels/8+0 b1b0d1d0f1f0h1h0 j1j0l1l0n1n0p1p0
- ;3*pixels/8+2 r1r0t1t0v1v0x1x0 z1z0B1B0D1D0F1F0
- ;-------------------------------------------------
-
- ;Pass 4, plane 3
- ; apt buff3+0*pixels/8
- ; bpt buff3+1*pixels/8
- ; dpt Planes+3*plsiz+offset
- ; amod 0
- ; bmod 0
- ; dmod 0
- ; cdat $aaaa
- ; sizv pixels/16
- ; sizh 1 word
- ; con D=AC+(B>>1)~C, ascending
-
- blit43: movem.l a2,-(sp)
- moveq #-1,d0
- move.l d0,(bltafwm,a0)
- move.w #0,(bltdmod,a0)
- move.l #buff3+0*pixels/8,(bltapt,a0)
- move.l #buff3+1*pixels/8,(bltbpt,a0)
- move.l (planes-mybltnode,a1),a2
- move.l (3*4,a2),a2
- add.w #offset,a2
- move.l a2,(bltdpt,a0) ; Plane3
- move.w #0,(bltamod,a0)
- move.w #0,(bltbmod,a0)
- move.w #$aaaa,(bltcdat,a0)
- move.l #$0DE41000,(bltcon0,a0) ; D=AC+(B>>1)~C
- move.w #height*64+width/16,(bltsize,a0)
- lea (blit41,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- movem.l (sp)+,a2
- rts
-
- ;-------------------------------------------------
- ;Plane3 a3b3c3d3e3f3g3h3 i3j3k3l3m3n3o3p3
- ;Plane3+2 q3r3s3t3u3v3w3x3 y3z3A3B3C3D3E3F3
- ;-------------------------------------------------
-
- ;Pass 4, plane 1
- ; apt buff3+2*pixels/8
- ; bpt buff3+3*pixels/8
- ; dpt Planes+1*plsiz+offset
- ; amod 0
- ; bmod 0
- ; dmod 0
- ; cdat $aaaa
- ; sizv pixels/16
- ; sizh 1 word
- ; con D=AC+(B>>1)~C, ascending
-
- blit41: movem.l a2,-(sp)
- move.l #buff3+2*pixels/8,(bltapt,a0)
- move.l #buff3+3*pixels/8,(bltbpt,a0)
- move.l (planes-mybltnode,a1),a2
- move.l (1*4,a2),a2
- add.w #offset,a2
- move.l a2,(bltdpt,a0) ; Plane1
- move.w #height*64+width/16,(bltsize,a0)
- lea (blit42,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- movem.l (sp)+,a2
- rts
-
- ;-------------------------------------------------
- ;Plane1 a1b1c1d1e1f1g1h1 i1j1k1l1m1n1o1p1
- ;Plane1+2 q1r1s1t1u1v1w1x1 y1z1A1B1C1D1E1F1
- ;-------------------------------------------------
-
- ;Pass 4, plane 2
- ; apt buff3+1*pixels/8-2
- ; bpt buff3+2*pixels/8-2
- ; dpt Planes+3*plsiz-2+offset
- ; amod 0
- ; bmod 0
- ; dmod 0
- ; cdat $aaaa
- ; sizv pixels/16
- ; sizh 1 word
- ; con D=(A<<1)C+B~C, descending
-
- blit42: movem.l a2,-(sp)
- move.l #buff3+1*pixels/8-2,(bltapt,a0)
- move.l #buff3+2*pixels/8-2,(bltbpt,a0)
- move.l (planes-mybltnode,a1),a2
- move.l (2*4,a2),d0
- add.l #plsiz-2+offset,d0
- move.l d0,(bltdpt,a0) ; Plane2+plsiz-2
- move.l #$1DE40002,(bltcon0,a0) ; D=(A<<1)C+B~C, desc.
- move.w #height*64+width/16,(bltsize,a0)
- lea (blit40,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- movem.l (sp)+,a2
- rts
-
- ;-------------------------------------------------
- ;Plane2 a2b2c2d2e2f2g2h2 i2j2k2l2m2n2o2p2
- ;Plane2+2 q2r2s2t2u2v2w2x2 y2z2A2B2C2D2E2F2
- ;-------------------------------------------------
-
- ;Pass 4, plane 0
- ; apt buff3+3*pixels/8-2
- ; bpt buff3+4*pixels/8-2
- ; dpt Planes+1*plsiz-2+offset
- ; amod 0
- ; bmod 0
- ; dmod 0
- ; cdat $aaaa
- ; sizv pixels/16
- ; sizh 1 word
- ; con D=(A<<1)C+B~C, descending
-
- blit40: movem.l a2,-(sp)
- move.l #buff3+3*pixels/8-2,(bltapt,a0)
- move.l #buff3+4*pixels/8-2,(bltbpt,a0)
- move.l (planes-mybltnode,a1),a2
- move.l (a2),d0
- add.l #plsiz-2+offset,d0
- move.l d0,(bltdpt,a0) ; Plane0+plsiz-2
- move.w #height*64+width/16,(bltsize,a0)
- move.l (initblitfunc-mybltnode,a1),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- moveq #0,d0 ; set Z flag
- movem.l (sp)+,a2
- rts
-
- ;-------------------------------------------------
- ;Plane0 a0b0c0d0e0f0g0h0 i0j0k0l0m0n0o0p0
- ;Plane0+2 q0r0s0t0u0v0w0x0 y0z0A0B0C0D0E0F0
- ;-------------------------------------------------
-
- qblitcleanup: movem.l a2/a6,-(sp)
- lea mybltnode,a2
- move.l (task-mybltnode,a2),a1 ; signal the task
- move.l (signals-mybltnode,a2),d0
- move.l (4).w,a6
- JSRLIB Signal ; may be called from interrupts
- movem.l (sp)+,a2/a6
- rts
-
- ;-----------------------------------------------------------------------------
- SECTION "DATA",DATA
-
- mybltnode: dc.l 0 ; next bltnode
- qblitfunc: dc.l 0 ; ptr to qblitfunc()
- dc.b cleanup ; stat
- dc.b 0 ; filler
- dc.w 0 ; blitsize
- dc.w 0 ; beamsync
- dc.l qblitcleanup ; ptr to qblitcleanup()
-
- CNOP 0,4
- chunky: dc.l 0 ; ptr to original chunky data
- planes: dc.l 0 ; ptr to list of output plane ptrs
- task: dc.l 0 ; ptr to this task
- signals: dc.l 0 ; signals to Signal() at cleanup
- initblitfunc dc.l 0 ; first qblit function
- buff2ptr dc.l 0 ; intermediate buffer 2
- wehaveocs dc.w 0 ; OCS installed?
-
- ;-----------------------------------------------------------------------------
- SECTION "CHIPBSS",BSS,CHIP ; MUST BE IN CHIP !!!!!
-
- buff3 ds.b pixels/2 ;Intermediate buffer 3
-
- ;-----------------------------------------------------------------------------
-
- end
-